#############################################################################
## Digital Laboratories: Evaluating Online Volunteer Subject
## Pools for Social Science Research
## Authors: Ryan D. Enos, Mark Hill, Austin M. Strange

## The following code defines the functions used in the creation of Tables
## 1-4
#############################################################################

## merge DLABSS master demographics file with DLABSS ANESP replication data
## drop unwanted variables and name variables to keep
merge_master_anesp <- function(all_data, anesp){
  drops <- c("V1", "V2", "V3", "V4", "V7", "LocationAccuracy", "X")
  anesp <- anesp[-c(1),-which(names(anesp) %in% drops)]
  all_data <- all_data[-c(1), c("Age", "Birth.Year", "Confirm",
                                "Contact", "Country", "Education",
                                "Email", "English", "H.Origins",
                                "Hand", "Hispanic","Ideology",
                                "Language","Live.in.US","Occupation",
                                "Party","Party.2","Race","Residence",
                                "Salary","Zip.Code", "Zip.Code.2")]
  
  ## process email data so it is ready to merge
  anesp$V5 <- tolower(anesp$V5)
  all_data$Email <- tolower(all_data$Email)
  all_data$V5 <- all_data$Email
  ## merge by email
  merged_anesp <- merge(anesp, all_data, by = "V5")
  
  return(merged_anesp)
}

## recodes the raw data from the DLABSS ANESP replication to be used in analysis
recode_values <- function(anesp){
  
  anesp$female <- as.numeric(anesp$female)
  anesp$schooling <- as.numeric(anesp$schooling)
  
  
  anesp$marital[anesp$marital==1] <- "married"
  anesp$marital[anesp$marital==2] <- "divorced"
  anesp$marital[anesp$marital==3] <- "separated"
  anesp$marital[anesp$marital==4] <- "never married"
  anesp$marital[anesp$marital==5] <- "widowed"
  
  anesp$housing[anesp$housing==1] <- "own"
  anesp$housing[anesp$housing==2] <- "rent"
  anesp$housing[anesp$housing==3] <- "other"
  
  anesp$religion[anesp$religion==1] <- "protestant"
  anesp$religion[anesp$religion==2] <- "catholic"
  anesp$religion[anesp$religion==3] <- "jewish"
  anesp$religion[anesp$religion==4] <- "other"
  anesp$religion[anesp$religion==5] <- "none"
  
  anesp$registered <- as.numeric(anesp$registered)
  anesp$registered[anesp$registered==1] <- 1
  anesp$registered[anesp$registered==2] <- 0
  anesp$registered[anesp$registered==3] <- 99
  
  anesp$turnout <- as.numeric(anesp$turnout)
  anesp$turnout[anesp$turnout == 1] <- 1
  anesp$turnout[anesp$turnout == 2] <- 0
  anesp$turnout[anesp$turnout == 3] <- 99
  
  anesp$scale.2 <- as.numeric(anesp$scale.2)
  anesp$scale.3 <- as.numeric(anesp$scale.3)
  anesp$scale.4 <- as.numeric(anesp$scale.4)
  anesp$ideology.scale <- NA
  anesp$ideology.scale[anesp$scale.2==1] <- 1
  anesp$ideology.scale[anesp$scale.2==2] <- 2
  anesp$ideology.scale[anesp$scale.3==1] <- 7
  anesp$ideology.scale[anesp$scale.3==2] <- 6
  anesp$ideology.scale[anesp$scale.4==1] <- 3
  anesp$ideology.scale[anesp$scale.4==2] <- 5
  anesp$ideology.scale[anesp$scale.4==3] <- 4
  
  anesp$par.1[anesp$par.1==1] <- "republican"
  anesp$par.1[anesp$par.1==2] <- "democrat"
  anesp$par.1[anesp$par.1==3] <- "independent"
  anesp$par.1[anesp$par.1==4] <- "other"
  anesp$par.1[anesp$par.1==""] <- NA
  
  anesp$party.scale <- NA
  anesp$par.2 <- as.numeric(anesp$par.2)
  anesp$par.3 <- as.numeric(anesp$par.3)
  anesp$par.4 <- as.numeric(anesp$par.4)
  anesp$party.scale[anesp$par.2==1] <- 7
  anesp$party.scale[anesp$par.2==2] <- 6
  anesp$party.scale[anesp$par.3==1] <- 1
  anesp$party.scale[anesp$par.3==2] <- 2
  anesp$party.scale[anesp$par.4==1] <- 5
  anesp$party.scale[anesp$par.4==2] <- 3
  anesp$party.scale[anesp$par.4==3] <- 4
  
  anesp$interest <- as.numeric(anesp$interest)
  anesp$interest[anesp$interest==1] <- 55
  anesp$interest[anesp$interest==2] <- 44
  anesp$interest[anesp$interest==3] <- 3
  anesp$interest[anesp$interest==4] <- 2
  anesp$interest[anesp$interest==5] <- 1
  anesp$interest[anesp$interest==55] <- 5
  anesp$interest[anesp$interest==44] <- 4
  
  anesp$knowledge1[anesp$knowledge1!=2] <- 0
  anesp$knowledge1[anesp$knowledge1==2] <- 1
  anesp$knowledge1 <- as.numeric(anesp$knowledge1)
  
  anesp$knowledge2[anesp$knowledge2!=6] <- 0
  anesp$knowledge2[anesp$knowledge2==6] <- 1
  anesp$knowledge2 <- as.numeric(anesp$knowledge2)
  
  anesp$knowledge3[anesp$knowledge3!=2] <- 0
  anesp$knowledge3[anesp$knowledge3==2] <- 1
  anesp$knowledge3 <- as.numeric(anesp$knowledge3)
  
  anesp$knowledge4[anesp$knowledge4!=2] <- 0
  anesp$knowledge4[anesp$knowledge4==2] <- 1
  anesp$knowledge4 <- as.numeric(anesp$knowledge4)
  
  anesp$knowledge5[anesp$knowledge5!=3] <- 0
  anesp$knowledge5[anesp$knowledge5==3] <- 1
  anesp$knowledge5 <- as.numeric(anesp$knowledge5)
  
  anesp$knowledge6[anesp$knowledge6!=2] <- 0
  anesp$knowledge6[anesp$knowledge6==2] <- 1
  anesp$knowledge6 <- as.numeric(anesp$knowledge6)
  
  anesp$opinion1 <- as.numeric(anesp$opinion1)
  anesp$opinion1[anesp$opinion1==2 | anesp$opinion1==3] <- 0
  
  anesp$opinion2 <- as.numeric(anesp$opinion2)
  anesp$opinion2[anesp$opinion2==2 | anesp$opinion2==3] <- 0
  
  anesp$opinion3 <- as.numeric(anesp$opinion3)
  anesp$opinion3[anesp$opinion3==2 | anesp$opinion3==3] <- 0
  
  anesp$opinion4 <- as.numeric(anesp$opinion4)
  anesp$opinion4[anesp$opinion4==2 | anesp$opinion4==3] <- 0
  
  anesp$opinion5 <- as.numeric(anesp$opinion5)
  anesp$opinion5[anesp$opinion5==2 | anesp$opinion5==3] <- 0
  
  anesp$opinion6 <- as.numeric(anesp$opinion6)
  anesp$opinion6[anesp$opinion6==2 | anesp$opinion6==3] <- 0
  
  birth_years <- as.numeric(anesp$Birth.Year)
  birth_years[birth_years == 98] <- 0
  birth_years[birth_years == 99] <- -1
  years <- 2016 - (1998 - birth_years)
  anesp$age.years <- years
  
  anesp$Hispanic[anesp$Hispanic==2]<- 0
  
  anesp$Race[anesp$Race==1] <- "white"
  anesp$Race[anesp$Race==2] <- "black"
  anesp$Race[anesp$Race==3] <- "native american"
  anesp$Race[anesp$Race==4] <- "asian indian"
  anesp$Race[anesp$Race==5 | anesp$Race == 9] <- "japanese"
  anesp$Race[anesp$Race==6] <- "hawaiian"
  anesp$Race[anesp$Race==7] <- "chinese"
  anesp$Race[anesp$Race==8] <- "korean"
  anesp$Race[anesp$Race==10] <- "guamanian"
  anesp$Race[anesp$Race==11] <- "filipino"
  anesp$Race[anesp$Race==12] <- "vietnamese"
  anesp$Race[anesp$Race==13] <- "samoan"
  anesp$Race[anesp$Race==14] <- "other"
  anesp$Race[anesp$Hispanic==1] <- "hispanic"
  
  anesp$income[anesp$Salary==1] <- 1500
  anesp$income[anesp$Salary==2] <- 4000
  anesp$income[anesp$Salary==3] <- 6250
  anesp$income[anesp$Salary==4] <- 8750
  anesp$income[anesp$Salary==5] <- 10500
  anesp$income[anesp$Salary==6] <- 11750
  anesp$income[anesp$Salary==7] <- 13750
  anesp$income[anesp$Salary==8] <- 16000
  anesp$income[anesp$Salary==9] <- 18500
  anesp$income[anesp$Salary==10] <- 21000
  anesp$income[anesp$Salary==11] <- 23500
  anesp$income[anesp$Salary==12] <- 27500
  anesp$income[anesp$Salary==13] <- 32500
  anesp$income[anesp$Salary==14] <- 37500
  anesp$income[anesp$Salary==15] <- 42500
  anesp$income[anesp$Salary==16] <- 47500
  anesp$income[anesp$Salary==17] <- 55000
  anesp$income[anesp$Salary==18] <- 67500
  anesp$income[anesp$Salary==19] <- 82500
  anesp$income[anesp$Salary==20] <- 95000
  anesp$income[anesp$Salary==21] <- 105000
  anesp$income[anesp$Salary==22] <- 115000
  anesp$income[anesp$Salary==23] <- 127500
  anesp$income[anesp$Salary==24] <- 142500
  anesp$income[anesp$Salary==25] <- 150000
  anesp$income[anesp$Salary==26] <- NA
  
  return(anesp)
  
}

recode_values_all <- function(all_data){
  
  all_data <- all_data[all_data$Country == 187,]
  
  all_data$Education <- as.numeric(all_data$Education)
  all_data$Education[all_data$Education == 1] <- 44
  all_data$Education[all_data$Education == 2] <- 6.5
  all_data$Education[all_data$Education == 3] <- 100
  all_data$Education[all_data$Education == 4] <- 12
  all_data$Education[all_data$Education == 5] <- 14
  all_data$Education[all_data$Education == 6] <- 16
  all_data$Education[all_data$Education == 7] <- 17.5
  all_data$Education[all_data$Education == 8] <- 21
  all_data$Education[all_data$Education == 9] <- 20
  all_data$Education[all_data$Education == 10] <- 19
  all_data$Education[all_data$Education == 11] <- 20
  all_data$Education[all_data$Education == 44] <- 4
  all_data$Education[all_data$Education == 100] <- 10
  
  birth_years <- as.numeric(all_data$Birth.Year)
  birth_years[birth_years == 98] <- 0
  birth_years[birth_years == 99] <- -1
  years <- 2016 - (1998 - birth_years)
  all_data$age.years <- years
  
  all_data$income <- NA
  all_data$income[all_data$Salary==1] <- 1500
  all_data$income[all_data$Salary==2] <- 4000
  all_data$income[all_data$Salary==3] <- 6250
  all_data$income[all_data$Salary==4] <- 8750
  all_data$income[all_data$Salary==5] <- 10500
  all_data$income[all_data$Salary==6] <- 11750
  all_data$income[all_data$Salary==7] <- 13750
  all_data$income[all_data$Salary==8] <- 16000
  all_data$income[all_data$Salary==9] <- 18500
  all_data$income[all_data$Salary==10] <- 21000
  all_data$income[all_data$Salary==11] <- 23500
  all_data$income[all_data$Salary==12] <- 27500
  all_data$income[all_data$Salary==13] <- 32500
  all_data$income[all_data$Salary==14] <- 37500
  all_data$income[all_data$Salary==15] <- 42500
  all_data$income[all_data$Salary==16] <- 47500
  all_data$income[all_data$Salary==17] <- 55000
  all_data$income[all_data$Salary==18] <- 67500
  all_data$income[all_data$Salary==19] <- 82500
  all_data$income[all_data$Salary==20] <- 95000
  all_data$income[all_data$Salary==21] <- 105000
  all_data$income[all_data$Salary==22] <- 115000
  all_data$income[all_data$Salary==23] <- 127500
  all_data$income[all_data$Salary==24] <- 142500
  all_data$income[all_data$Salary==25] <- 150000
  all_data$income[all_data$Salary==26] <- NA
  
  all_data$Race[all_data$Race==1] <- "white"
  all_data$Race[all_data$Race==2] <- "black"
  all_data$Race[all_data$Race==3] <- "native american"
  all_data$Race[all_data$Race==4] <- "asian indian"
  all_data$Race[all_data$Race==5 | all_data$Race == 9] <- "japanese"
  all_data$Race[all_data$Race==6] <- "hawaiian"
  all_data$Race[all_data$Race==7] <- "chinese"
  all_data$Race[all_data$Race==8] <- "korean"
  all_data$Race[all_data$Race==10] <- "guamanian"
  all_data$Race[all_data$Race==11] <- "filipino"
  all_data$Race[all_data$Race==12] <- "vietnamese"
  all_data$Race[all_data$Race==13] <- "samoan"
  all_data$Race[all_data$Race==14] <- "other"
  all_data$Race[all_data$Hispanic==1] <- "hispanic"
  
  return(all_data)

}

## recodes raw data from MTurk ANESP replication to be used in analysis
recode_values_mturk <- function(anesp){
  
  anesp$female <- as.numeric(anesp$female)
  anesp$female[anesp$female == 1] <- 0
  anesp$female[anesp$female == 2] <- 1
  
  anesp$Education <- as.numeric(anesp$Education)
  anesp$Education[anesp$Education == 1] <- 55
  anesp$Education[anesp$Education == 2] <- 88
  anesp$Education[anesp$Education == 3] <- 100
  anesp$Education[anesp$Education == 4] <- 12
  anesp$Education[anesp$Education == 5] <- 14
  anesp$Education[anesp$Education == 6] <- 16
  anesp$Education[anesp$Education == 7] <- 18
  anesp$Education[anesp$Education == 8] <- 20
  anesp$Education[anesp$Education == 9] <- 20
  anesp$Education[anesp$Education == 10] <- 19
  anesp$Education[anesp$Education == 11] <- 20
  anesp$Education[anesp$Education == 55] <- 5
  anesp$Education[anesp$Education == 88] <- 8
  anesp$Education[anesp$Education == 100] <- 10
  
  
  anesp$marital[anesp$marital==1] <- "married"
  anesp$marital[anesp$marital==2] <- "divorced"
  anesp$marital[anesp$marital==3] <- "separated"
  anesp$marital[anesp$marital==4] <- "never married"
  anesp$marital[anesp$marital==5] <- "widowed"
  
  anesp$housing[anesp$housing==1] <- "own"
  anesp$housing[anesp$housing==2] <- "rent"
  anesp$housing[anesp$housing==3] <- "other"
  
  anesp$religion[anesp$religion==1] <- "protestant"
  anesp$religion[anesp$religion==2] <- "catholic"
  anesp$religion[anesp$religion==3] <- "jewish"
  anesp$religion[anesp$religion==4] <- "other"
  anesp$religion[anesp$religion==5] <- "none"
  
  anesp$registered <- as.numeric(anesp$registered)
  anesp$registered[anesp$registered==1] <- 1
  anesp$registered[anesp$registered==2] <- 0
  anesp$registered[anesp$registered==3] <- 99
  
  anesp$turnout <- as.numeric(anesp$turnout)
  anesp$turnout[anesp$turnout == 1] <- 1
  anesp$turnout[anesp$turnout == 2] <- 0
  anesp$turnout[anesp$turnout == 3] <- 99
  
  anesp$scale.2 <- as.numeric(anesp$scale.2)
  anesp$scale.3 <- as.numeric(anesp$scale.3)
  anesp$scale.4 <- as.numeric(anesp$scale.4)
  anesp$ideology.scale <- NA
  anesp$ideology.scale[anesp$scale.2==1] <- 1
  anesp$ideology.scale[anesp$scale.2==2] <- 2
  anesp$ideology.scale[anesp$scale.3==1] <- 7
  anesp$ideology.scale[anesp$scale.3==2] <- 6
  anesp$ideology.scale[anesp$scale.4==1] <- 3
  anesp$ideology.scale[anesp$scale.4==2] <- 5
  anesp$ideology.scale[anesp$scale.4==3] <- 4
  
  anesp$par.1[anesp$par.1==1] <- "republican"
  anesp$par.1[anesp$par.1==2] <- "democrat"
  anesp$par.1[anesp$par.1==3] <- "independent"
  anesp$par.1[anesp$par.1==4] <- "other"
  anesp$par.1[anesp$par.1==""] <- NA
  
  anesp$party.scale <- NA
  anesp$par.2 <- as.numeric(anesp$par.2)
  anesp$par.3 <- as.numeric(anesp$par.3)
  anesp$par.4 <- as.numeric(anesp$par.4)
  anesp$party.scale[anesp$par.2==1] <- 7
  anesp$party.scale[anesp$par.2==2] <- 6
  anesp$party.scale[anesp$par.3==1] <- 1
  anesp$party.scale[anesp$par.3==2] <- 2
  anesp$party.scale[anesp$par.4==1] <- 5
  anesp$party.scale[anesp$par.4==2] <- 3
  anesp$party.scale[anesp$par.4==3] <- 4
  
  anesp$interest <- as.numeric(anesp$interest)
  anesp$interest[anesp$interest==1] <- 55
  anesp$interest[anesp$interest==2] <- 44
  anesp$interest[anesp$interest==3] <- 3
  anesp$interest[anesp$interest==4] <- 2
  anesp$interest[anesp$interest==5] <- 1
  anesp$interest[anesp$interest==55] <- 5
  anesp$interest[anesp$interest==44] <- 4
  
  anesp$knowledge1[anesp$knowledge1!=2] <- 0
  anesp$knowledge1[anesp$knowledge1==2] <- 1
  anesp$knowledge1 <- as.numeric(anesp$knowledge1)
  
  anesp$knowledge2[anesp$knowledge2!=6] <- 0
  anesp$knowledge2[anesp$knowledge2==6] <- 1
  anesp$knowledge2 <- as.numeric(anesp$knowledge2)
  
  anesp$knowledge3[anesp$knowledge3!=2] <- 0
  anesp$knowledge3[anesp$knowledge3==2] <- 1
  anesp$knowledge3 <- as.numeric(anesp$knowledge3)
  
  anesp$knowledge4[anesp$knowledge4!=2] <- 0
  anesp$knowledge4[anesp$knowledge4==2] <- 1
  anesp$knowledge4 <- as.numeric(anesp$knowledge4)
  
  anesp$knowledge5[anesp$knowledge5!=3] <- 0
  anesp$knowledge5[anesp$knowledge5==3] <- 1
  anesp$knowledge5 <- as.numeric(anesp$knowledge5)
  
  anesp$knowledge6[anesp$knowledge6!=2] <- 0
  anesp$knowledge6[anesp$knowledge6==2] <- 1
  anesp$knowledge6 <- as.numeric(anesp$knowledge6)
  
  anesp$opinion1 <- as.numeric(anesp$opinion1)
  anesp$opinion1[anesp$opinion1==2 | anesp$opinion1==3] <- 0
  
  anesp$opinion2 <- as.numeric(anesp$opinion2)
  anesp$opinion2[anesp$opinion2==2 | anesp$opinion2==3] <- 0
  
  anesp$opinion3 <- as.numeric(anesp$opinion3)
  anesp$opinion3[anesp$opinion3==2 | anesp$opinion3==3] <- 0
  
  anesp$opinion4 <- as.numeric(anesp$opinion4)
  anesp$opinion4[anesp$opinion4==2 | anesp$opinion4==3] <- 0
  
  anesp$opinion5 <- as.numeric(anesp$opinion5)
  anesp$opinion5[anesp$opinion5==2 | anesp$opinion5==3] <- 0
  
  anesp$opinion6 <- as.numeric(anesp$opinion6)
  anesp$opinion6[anesp$opinion6==2 | anesp$opinion6==3] <- 0
  
  birth_years <- as.numeric(anesp$Birth.Year)
  birth_years[birth_years == 98] <- 0
  birth_years[birth_years == 99] <- -1
  years <- 2016 - (1998 - birth_years)
  anesp$age.years <- years
  
  anesp$Hispanic[anesp$Hispanic==2]<- 0
  
  anesp$Race[anesp$Race==1] <- "white"
  anesp$Race[anesp$Race==2] <- "black"
  anesp$Race[anesp$Race==3] <- "native american"
  anesp$Race[anesp$Race==4] <- "asian indian"
  anesp$Race[anesp$Race==5 | anesp$Race == 9] <- "japanese"
  anesp$Race[anesp$Race==6] <- "hawaiian"
  anesp$Race[anesp$Race==7] <- "chinese"
  anesp$Race[anesp$Race==8] <- "korean"
  anesp$Race[anesp$Race==10] <- "guamanian"
  anesp$Race[anesp$Race==11] <- "filipino"
  anesp$Race[anesp$Race==12] <- "vietnamese"
  anesp$Race[anesp$Race==13] <- "samoan"
  anesp$Race[anesp$Race==14] <- "other"
  anesp$Race[anesp$Hispanic==1] <- "hispanic"
  
  anesp$income <- NA
  anesp$income[anesp$Salary==1] <- 1500
  anesp$income[anesp$Salary==2] <- 4000
  anesp$income[anesp$Salary==3] <- 6250
  anesp$income[anesp$Salary==4] <- 8750
  anesp$income[anesp$Salary==5] <- 10500
  anesp$income[anesp$Salary==6] <- 11750
  anesp$income[anesp$Salary==7] <- 13750
  anesp$income[anesp$Salary==8] <- 16000
  anesp$income[anesp$Salary==9] <- 18500
  anesp$income[anesp$Salary==10] <- 21000
  anesp$income[anesp$Salary==11] <- 23500
  anesp$income[anesp$Salary==12] <- 27500
  anesp$income[anesp$Salary==13] <- 32500
  anesp$income[anesp$Salary==14] <- 37500
  anesp$income[anesp$Salary==15] <- 42500
  anesp$income[anesp$Salary==16] <- 47500
  anesp$income[anesp$Salary==17] <- 55000
  anesp$income[anesp$Salary==18] <- 67500
  anesp$income[anesp$Salary==19] <- 82500
  anesp$income[anesp$Salary==20] <- 95000
  anesp$income[anesp$Salary==21] <- 105000
  anesp$income[anesp$Salary==22] <- 115000
  anesp$income[anesp$Salary==23] <- 127500
  anesp$income[anesp$Salary==24] <- 142500
  anesp$income[anesp$Salary==25] <- 150000
  anesp$income[anesp$Salary==26] <- NA
  
  return(anesp)
  
}


## make a new data frame with latitude and longitide data to be used to 
## locate region
make_location <- function(anesp){
  lat <- as.numeric(anesp$LocationLatitude)
  long <- as.numeric(anesp$LocationLongitude)
  
  location <- na.omit(data.frame(cbind(long,lat)))
  
  return(location)
}


## change latitude and longitude values to state names
latlong2state <- function(pointsDF) {
  states <- map('state', fill=TRUE, col="transparent", plot=FALSE)
  IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
  states_sp <- map2SpatialPolygons(states, IDs=IDs,
                                   proj4string=CRS("+proj=longlat +datum=WGS84"))
  
  # Convert pointsDF to a SpatialPoints object 
  pointsSP <- SpatialPoints(pointsDF, 
                            proj4string=CRS("+proj=longlat +datum=WGS84"))
  
  # Use 'over' to get _indices_ of the Polygons object containing each point 
  indices <- over(pointsSP, states_sp)
  
  # Return the state names of the Polygons object containing each point
  stateNames <- sapply(states_sp@polygons, function(x) x@ID)
  stateNames[indices]
}

## takes a dataset with state names and provides regions for the given states
get_region <- function(states){
  states$region <- NA
  states$region[states$states == "maine" | states$states == "new hampshire" | 
                  states$states == "vermont" | states$states == "massachusetts" |
                  states$states == "rhode island" | states$states == "connecticut" | 
                  states$states == "new york" | states$states == "new jersey" |
                  states$states == "pennsylvania"] <- "northeast"
  states$region[states$states == "delaware" | states$states == "maryland" |
                  states$states == "west virginia" | states$states == "virginia" | 
                  states$states == "kentucky" | states$states == "tennessee"
                | states$states == "north carolina" | states$states =="south carolina"
                | states$states == "georgia" | states$states == "florida"
                | states$states =="alabama" | states$states =="mississippi"
                | states$states =="louisiana" | states$states =="arkansas"
                | states$states == "oklahoma" | states$states =="texas"
                | states$states == "district of columbia"] <- "south"
  states$region[states$states =="ohio" | states$states =="michigan" 
                | states$states =="indiana" | states$states =="illinois"
                | states$states =="wisconsin"| states$states =="missouri"
                | states$states =="iowa" | states$states =="minnesota"
                | states$states =="kansas" | states$states =="nebraska"
                | states$states =="south dakota"| states$states =="north dakota"] <- "midwest"
  states$region[states$states =="new mexico"| states$states =="colorado"
                | states$states =="wyoming"| states$states =="montana"
                | states$states =="arizona"| states$states =="utah"
                | states$states =="idaho"| states$states =="california"
                | states$states =="nevada"| states$states =="oregon"
                | states$states =="washington" | states$states =="hawaii"
                | states$states =="alaska"] <- "west"
  
  return(states)
}

## functions to return standard errors for numeric values
SE <- function(x) sd(na.omit(x))/sqrt(length(na.omit(x)))
## functions to return standard errors for proportions
SE.prop <-function(x) sqrt(mean(na.omit(x))*(1-mean(na.omit(x)))/length(na.omit(x)))



